# figure settings
import scanpy as sc
import anndata as ad
import os
sc.set_figure_params(scanpy=True, dpi=80, dpi_save=250,
frameon=True, vector_friendly=True,
color_map="YlGnBu", format='pdf', transparent=False,
ipython_format='png2x')
DATADIR = "/storage/groups/ce01/workspace/mobisc_anna/processed_datasets/share_seq_brain"
#%ls $DATADIR
files = [os.path.join(DATADIR, 'old_anndata_brain_share_seq_geneactivity_rna_merged.h5ad')]
int_files = ['liger_jnmf_integration.h5ad',
'scJoint_integration.h5ad',
'seurat_cca_integration.h5ad',
'seurat_rpca_integration.h5ad']
DATADIR = "/storage/groups/ce01/workspace/mobisc_anna/integrated_outputs/share_seq_brain_shared_genes_full"
#%ls $DATADIR
for i in int_files:
files.append(os.path.join(DATADIR, i))
data = ad.read(files[0])
sc.pp.pca(data)
sc.pp.neighbors(data)
sc.tl.umap(data)
sc.pl.umap(data, color='broad_annotations')
from multiprocessing import Process, Queue from time import sleep
def my_func(n, l): if n % 2 == 0: sleep(5) for i in l: print(f'Process {n}: {i}\n')
processes = [] my_list = range(1, 5) for i in range(8): p = Process(target=my_func, args=(i, my_list)) p.start() processes.append(p)
for p in processes: p.join()
# Load packages
import scanpy as sc
import anndata as ad
import os
import pandas as pd
import matplotlib.pyplot as plt
import scmoib
import numpy as np
import tqdm
During startup - Warning messages: 1: Setting LC_TIME failed, using "C" 2: Setting LC_MONETARY failed, using "C" 3: Setting LC_PAPER failed, using "C" 4: Setting LC_MEASUREMENT failed, using "C"
# figure settings
sc.set_figure_params(scanpy=True, dpi=80, dpi_save=250,
frameon=True, vector_friendly=True,
color_map="YlGnBu", format='pdf', transparent=False,
ipython_format='png2x')
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scanpy/_settings.py:447: DeprecationWarning: `set_matplotlib_formats` is deprecated since IPython 7.23, directly use `matplotlib_inline.backend_inline.set_matplotlib_formats()`
DATADIR = "/storage/groups/ce01/workspace/mobisc_anna/processed_datasets/share_seq_brain"
files = [os.path.join(DATADIR, 'old_anndata_brain_share_seq_geneactivity_rna_merged.h5ad')]
int_files = ['liger_jnmf_integration.h5ad', 'scJoint_integration.h5ad', 'seurat_cca_integration.h5ad', 'seurat_rpca_integration.h5ad']
DATADIR = "/storage/groups/ce01/workspace/mobisc_anna/integrated_outputs/share_seq_brain_shared_genes_full"
for i in int_files: files.append(os.path.join(DATADIR, i))
ids = ['unintegrated', 'liger', 'scjoint', 'seurat_cca', 'seurat_rpca'] modes = ['raw', 'comps', 'comps', 'count', 'count'] col = ['broad_annotations'] * 5 reps = [None, 'X_inmf_embedding', 'X_scjoint_embedding', None, None] my_obj = scmoib.tl.MetricsCalculator() adata_list = []
DATADIR = "/mnt/znas/icb_zstore01/groups/ce01/workspace/mobisc_anna/processed_datasets/PBMC_10X/"
#%ls $DATADIR
files = [os.path.join(DATADIR, 'old_anndata_pbmc_10x_processed_merged_rna_first_full_feature.h5ad')]
int_files = ['liger/PBMC_10X_jNMF.h5ad',
'seurat_cca/pbmc_10x_seurat_cca_rna_first.h5ad',
'bindsc/pbmc_10x_bindsc_rna_first_full_feature.h5ad',
'scalex/RNA-ATAC_pbmc_10x_scalex_processed_full_feature.h5ad',
'seurat_rpca/pbmc_10x_seurat_rpca_rna_first.h5ad']
DATADIR = "/storage/groups/ce01/workspace/mobisc_anna/integrated_outputs/"
%ls $DATADIR
for i in int_files:
files.append(os.path.join(DATADIR, i))
ids = ['unintegrated', 'liger', 'seurat_cca', 'bindsc', 'scalex', 'seurat_rpca']
modes = ['raw', 'comps', 'count', 'count', 'graph', 'count']
col = ['broad cell types', 'broad.cell.types'] + ['broad cell types'] * 4
reps = [None, 'X_iNMF', None, None, None, None]
my_obj = scmoib.tl.MetricsCalculator()
adata_list = []
PBMC_10x/ pbmc_jnmf.h5ad scalex/ share_seq_brain_shared_genes_full/ bindsc/ pbmc_uinmf.h5ad* seurat_cca/ liger/ scJoint/ seurat_rpca/
%%time
for i in tqdm.tqdm(files):
pbmc = ad.read(i)
adata_id = ids[files.index(i)]
clust_id = 'louvain'
ct_id = col[files.index(i)]
bc_list = list(pbmc.obs.index)
bc_list1 = bc_list[:len(bc_list) // 2]
bc_list2 = bc_list[len(bc_list) // 2:]
mode = modes[files.index(i)]
rep = reps[files.index(i)]
my_obj.check_anndata(pbmc, ct_id, mode=mode, use_rep=rep)
print('-' * 10)
my_obj.node_metrics(pbmc, adata_id, bc_list1, bc_list2, ct_id, n_jobs=8)
my_obj.spec_dist(pbmc, adata_id=adata_id, n_metr=10)
try:
my_obj._accuracy_paired_omics(pbmc, adata_id, bc_list1, bc_list2, 'omic_batch', clust_id)
my_obj._accuracy_paired_omics_per_cell_type(pbmc, adata_id, bc_list1, bc_list2,
'omic_batch', clust_id, ct_id)
my_obj.ami(pbmc, adata_id, ct_id, clust_id)
my_obj.ari(pbmc, adata_id, ct_id, clust_id)
my_obj.nmi(pbmc, adata_id, ct_id, clust_id)
my_obj.homogeneity(pbmc, adata_id, ct_id, clust_id)
except ValueError as err:
print(f"Trouble with {adata_id}")
print(err.args[0])
my_obj._graph_connectivity(pbmc, adata_id, ct_id)
try:
my_obj.silhouette(pbmc, adata_id, batch_key='omic_batch', embed='X_umap')
my_obj.silhouette_batch(pbmc, adata_id, batch_key='omic_batch', cell_label=ct_id, embed='X_umap')
except:
print('Silhouette failed')
my_obj._average_dist_between_matching_barcodes(pbmc, adata_id, bc_list1, bc_list2, metric='cosine')
my_obj.isolated_labels_score(pbmc, adata_id, ct_id, 'omic_batch', 'X_umap')
adata_list.append(pbmc)
my_obj.get_df()
0%| | 0/6 [00:00<?, ?it/s]
Running pre-flight check
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/sklearn/manifold/_t_sne.py:783: FutureWarning: The default initialization in TSNE will change from 'random' to 'pca' in 1.2.
Clustering... step 0 got 23 at resolution 1.5 step 1 got 15 at resolution 0.75 step 2 got 11 at resolution 0.375 step 3 got 7 at resolution 0.1875 step 4 got 11 at resolution 0.28125 step 5 got 10 at resolution 0.234375 ----------
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scmoib-0.0.1+7.gfe906bc.dirty-py3.7.egg/scmoib/tools/metrics/utils/dijkstra.py:137: UserWarning: Warning! You are running the high load graph method. Test run information: CPU: 8 cores Wall time: 6 minutes RAM: 4 GB(?) Number of barcode pairs: 11k /home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scmoib-0.0.1+7.gfe906bc.dirty-py3.7.egg/scmoib/tools/metrics/_node_metrics.py:51: RuntimeWarning: Mean of empty slice. /home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/numpy/core/_methods.py:189: RuntimeWarning: invalid value encountered in double_scalars /home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scib-1.0.0-py3.7.egg/scib/metrics/isolated_labels.py:57: DeprecationWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning. 17%|██████▊ | 1/6 [22:51<1:54:18, 1371.60s/it]/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/anndata/compat/__init__.py:183: FutureWarning: Moving element from .uns['neighbors']['distances'] to .obsp['distances']. This is where adjacency matrices should go now.
Running pre-flight check WARNING: .obsp["connectivities"] have not been computed using umap Clustering... step 0 got 36 at resolution 1.5 step 1 got 26 at resolution 0.75 step 2 got 18 at resolution 0.375 step 3 got 12 at resolution 0.1875 step 4 got 9 at resolution 0.09375 step 5 got 11 at resolution 0.140625 step 6 got 10 at resolution 0.1171875 ----------
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scmoib-0.0.1+7.gfe906bc.dirty-py3.7.egg/scmoib/tools/metrics/utils/dijkstra.py:137: UserWarning: Warning! You are running the high load graph method. Test run information: CPU: 8 cores Wall time: 6 minutes RAM: 4 GB(?) Number of barcode pairs: 11k /home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scib-1.0.0-py3.7.egg/scib/metrics/isolated_labels.py:57: DeprecationWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning. 33%|██████████████▋ | 2/6 [31:03<56:57, 854.29s/it]
Running pre-flight check
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/sklearn/manifold/_t_sne.py:783: FutureWarning: The default initialization in TSNE will change from 'random' to 'pca' in 1.2.
Clustering... step 0 got 21 at resolution 1.5 step 1 got 13 at resolution 0.75 step 2 got 9 at resolution 0.375 step 3 got 12 at resolution 0.5625 step 4 got 11 at resolution 0.46875 step 5 got 10 at resolution 0.421875 ----------
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scmoib-0.0.1+7.gfe906bc.dirty-py3.7.egg/scmoib/tools/metrics/utils/dijkstra.py:137: UserWarning: Warning! You are running the high load graph method. Test run information: CPU: 8 cores Wall time: 6 minutes RAM: 4 GB(?) Number of barcode pairs: 11k /home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scib-1.0.0-py3.7.egg/scib/metrics/isolated_labels.py:57: DeprecationWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning. 50%|██████████████████████ | 3/6 [40:21<35:56, 718.78s/it]/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/h5py/_hl/dataset.py:541: DeprecationWarning: Passing None into shape arguments as an alias for () is deprecated.
Running pre-flight check
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/sklearn/manifold/_t_sne.py:783: FutureWarning: The default initialization in TSNE will change from 'random' to 'pca' in 1.2.
Clustering... step 0 got 21 at resolution 1.5 step 1 got 16 at resolution 0.75 step 2 got 11 at resolution 0.375 step 3 got 8 at resolution 0.1875 step 4 got 9 at resolution 0.28125 step 5 got 11 at resolution 0.328125 step 6 got 9 at resolution 0.3046875 step 7 got 11 at resolution 0.31640625 step 8 got 9 at resolution 0.310546875 step 9 got 10 at resolution 0.3134765625 ----------
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scmoib-0.0.1+7.gfe906bc.dirty-py3.7.egg/scmoib/tools/metrics/utils/dijkstra.py:137: UserWarning: Warning! You are running the high load graph method. Test run information: CPU: 8 cores Wall time: 6 minutes RAM: 4 GB(?) Number of barcode pairs: 11k /home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scib-1.0.0-py3.7.egg/scib/metrics/isolated_labels.py:57: DeprecationWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning. 67%|█████████████████████████████▎ | 4/6 [52:16<23:54, 717.30s/it]/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/h5py/_hl/dataset.py:541: DeprecationWarning: Passing None into shape arguments as an alias for () is deprecated.
Running pre-flight check Clustering... step 0 got 18 at resolution 1.5 step 1 got 13 at resolution 0.75 step 2 got 8 at resolution 0.375 step 3 got 11 at resolution 0.5625 step 4 got 10 at resolution 0.46875 ----------
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scmoib-0.0.1+7.gfe906bc.dirty-py3.7.egg/scmoib/tools/metrics/utils/dijkstra.py:137: UserWarning: Warning! You are running the high load graph method. Test run information: CPU: 8 cores Wall time: 6 minutes RAM: 4 GB(?) Number of barcode pairs: 11k
KeysView(AxisArrays with keys: X_umap, latent) Silhouette failed
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scib-1.0.0-py3.7.egg/scib/metrics/isolated_labels.py:57: DeprecationWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning. 83%|███████████████████████████████████ | 5/6 [1:05:11<12:18, 738.33s/it]
Running pre-flight check
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/sklearn/manifold/_t_sne.py:783: FutureWarning: The default initialization in TSNE will change from 'random' to 'pca' in 1.2.
Clustering... step 0 got 21 at resolution 1.5 step 1 got 16 at resolution 0.75 step 2 got 11 at resolution 0.375 step 3 got 8 at resolution 0.1875 step 4 got 9 at resolution 0.28125 step 5 got 10 at resolution 0.328125 ----------
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scmoib-0.0.1+7.gfe906bc.dirty-py3.7.egg/scmoib/tools/metrics/utils/dijkstra.py:137: UserWarning: Warning! You are running the high load graph method. Test run information: CPU: 8 cores Wall time: 6 minutes RAM: 4 GB(?) Number of barcode pairs: 11k /home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scib-1.0.0-py3.7.egg/scib/metrics/isolated_labels.py:57: DeprecationWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning. 100%|██████████████████████████████████████████| 6/6 [1:15:55<00:00, 759.24s/it]
CPU times: user 3h 30min 13s, sys: 1h 20min 46s, total: 4h 51min Wall time: 1h 15min 55s
| conn_ratio | spec_dist_10 | accuracy | AMI | ARI | NMI | homogeneity | graph connectivity broad cell types | sil_global | sil_clus | pairwise_distance | il_sil | il_clus | graph connectivity broad.cell.types | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| unintegrated | 0.0 | NaN | 0.000000 | 0.601217 | 0.421081 | 0.601592 | 0.615105 | 0.499798 | 0.723077 | 0.349730 | 0.761411 | NaN | NaN | NaN |
| liger | 1.0 | 0.629795 | 0.322143 | 0.532695 | 0.448401 | 0.533130 | 0.551172 | NaN | 0.652315 | 0.466547 | 0.861671 | NaN | NaN | 0.633627 |
| seurat_cca | 1.0 | 0.938760 | 0.849163 | 0.729525 | 0.646358 | 0.729793 | 0.710242 | 0.979320 | 0.507945 | 0.720455 | 0.837083 | NaN | NaN | NaN |
| bindsc | 1.0 | 0.398853 | 0.010280 | 0.629792 | 0.457215 | 0.630131 | 0.661708 | 0.936445 | 0.524120 | 0.704856 | 0.687280 | NaN | NaN | NaN |
| scalex | 1.0 | 0.875036 | 0.388737 | 0.666833 | 0.482170 | 0.667135 | 0.707853 | 0.988257 | NaN | NaN | 0.705320 | NaN | NaN | NaN |
| seurat_rpca | 1.0 | 0.836273 | 0.701055 | 0.635951 | 0.578401 | 0.636312 | 0.621872 | 0.966974 | 0.520412 | 0.771678 | 0.890710 | NaN | NaN | NaN |
scmoib.pl.cumulative_node_group(adata_list[1:], ids[1:])
df = my_obj.get_df()
df
| conn_ratio | spec_dist_10 | accuracy | AMI | ARI | NMI | homogeneity | graph connectivity broad cell types | sil_global | sil_clus | pairwise_distance | il_sil | il_clus | graph connectivity broad.cell.types | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| unintegrated | 0.0 | NaN | 0.000000 | 0.601217 | 0.421081 | 0.601592 | 0.615105 | 0.499798 | 0.723077 | 0.349730 | 0.761411 | NaN | NaN | NaN |
| liger | 1.0 | 0.629795 | 0.322143 | 0.532695 | 0.448401 | 0.533130 | 0.551172 | NaN | 0.652315 | 0.466547 | 0.861671 | NaN | NaN | 0.633627 |
| seurat_cca | 1.0 | 0.938760 | 0.849163 | 0.729525 | 0.646358 | 0.729793 | 0.710242 | 0.979320 | 0.507945 | 0.720455 | 0.837083 | NaN | NaN | NaN |
| bindsc | 1.0 | 0.398853 | 0.010280 | 0.629792 | 0.457215 | 0.630131 | 0.661708 | 0.936445 | 0.524120 | 0.704856 | 0.687280 | NaN | NaN | NaN |
| scalex | 1.0 | 0.875036 | 0.388737 | 0.666833 | 0.482170 | 0.667135 | 0.707853 | 0.988257 | NaN | NaN | 0.705320 | NaN | NaN | NaN |
| seurat_rpca | 1.0 | 0.836273 | 0.701055 | 0.635951 | 0.578401 | 0.636312 | 0.621872 | 0.966974 | 0.520412 | 0.771678 | 0.890710 | NaN | NaN | NaN |
df.drop(columns=['conn_ratio', 'il_clus', 'il_sil'], inplace=True)
df
| spec_dist_10 | accuracy | AMI | ARI | NMI | homogeneity | graph connectivity broad cell types | sil_global | sil_clus | pairwise_distance | graph connectivity broad.cell.types | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| unintegrated | NaN | 0.000000 | 0.601217 | 0.421081 | 0.601592 | 0.615105 | 0.499798 | 0.723077 | 0.349730 | 0.761411 | NaN |
| liger | 0.629795 | 0.322143 | 0.532695 | 0.448401 | 0.533130 | 0.551172 | NaN | 0.652315 | 0.466547 | 0.861671 | 0.633627 |
| seurat_cca | 0.938760 | 0.849163 | 0.729525 | 0.646358 | 0.729793 | 0.710242 | 0.979320 | 0.507945 | 0.720455 | 0.837083 | NaN |
| bindsc | 0.398853 | 0.010280 | 0.629792 | 0.457215 | 0.630131 | 0.661708 | 0.936445 | 0.524120 | 0.704856 | 0.687280 | NaN |
| scalex | 0.875036 | 0.388737 | 0.666833 | 0.482170 | 0.667135 | 0.707853 | 0.988257 | NaN | NaN | 0.705320 | NaN |
| seurat_rpca | 0.836273 | 0.701055 | 0.635951 | 0.578401 | 0.636312 | 0.621872 | 0.966974 | 0.520412 | 0.771678 | 0.890710 | NaN |
df.sum(axis=1)
unintegrated 4.573011 liger 5.631496 seurat_cca 7.648644 bindsc 5.640680 scalex 5.481340 seurat_rpca 7.159638 dtype: float64
for i,j in zip(adata_list, ids):
print(j)
try:
sc.pl.umap(i, color=['broad cell types', 'omic_batch'], wspace=0.7)
except:
sc.pl.umap(i, color=['broad.cell.types', 'omic_batch'], wspace=0.7)
unintegrated
liger
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/anndata/_core/anndata.py:1228: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Reordering categories will always return a new Categorical object. ... storing 'orig.ident' as categorical /home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/anndata/_core/anndata.py:1228: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Reordering categories will always return a new Categorical object. ... storing 'filter_barcodes' as categorical /home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/anndata/_core/anndata.py:1228: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Reordering categories will always return a new Categorical object. ... storing 'louvain2_rna' as categorical /home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/anndata/_core/anndata.py:1228: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Reordering categories will always return a new Categorical object. ... storing 'filter' as categorical /home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/anndata/_core/anndata.py:1228: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Reordering categories will always return a new Categorical object. ... storing 'omic_batch' as categorical /home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/anndata/_core/anndata.py:1228: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Reordering categories will always return a new Categorical object. ... storing 'group' as categorical
<Figure size 1088x320 with 0 Axes>
seurat_cca
bindsc
scalex
seurat_rpca